#!/bin/bash
set -euo pipefail

# ----- CONFIGURATION -----
INPUT="NC_000913_3_CDS_1000_chunks.fasta"
FORMAT="png"
ROWS=2
COLUMNS=1000
TSTV=2.73
MUTATIONS=(0 10 20 30 40 50 60 70 80 90 100)
EXECUTIONS=3
SRC_FILES=(mutator.c merge.c main.c cdaig.c hopcroft.c)
CSV_NAME="results.csv"
# --------------------------

# Script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

# ----- RESULTS DIRECTORY -----
last_exp=$(ls -d "${SCRIPT_DIR}"/Experiment_* 2>/dev/null | grep -oE '[0-9]+$' | sort -n | tail -1)
next_exp=${last_exp:-0}
next_exp=$((next_exp + 1))
BASE_DIR="${SCRIPT_DIR}/Experiment_${next_exp}"
mkdir -p "$BASE_DIR"

echo "Experiment folder: $BASE_DIR"

# ----- Run fasta_extractor inside experiment folder -----
cd "$BASE_DIR"
gcc -fsanitize=address -g -Wall -Wextra -o fasta_extractor "${SCRIPT_DIR}/fasta_extractor.c"
./fasta_extractor "${SCRIPT_DIR}/${INPUT}" 1

# ----- CSV INIT -----
CSV_FILE="${BASE_DIR}/${CSV_NAME}"
header="Sequence,Mutation"
for ((exec_i=1; exec_i<=EXECUTIONS; exec_i++)); do
    header+=",e${exec_i},v${exec_i},suff${exec_i}"
done
header+=",e_min,e_max,e_avg,e_std,v_min,v_max,v_avg,v_std,suff_min,suff_max,suff_avg,suff_std"
echo "$header" > "$CSV_FILE"

# Enable nullglob
shopt -s nullglob

# ----- LOOP OVER SEQUENCE DIRECTORIES GENERATED BY fasta_extractor -----
for SEQ_DIR in "$BASE_DIR"/*/; do
    [ -d "$SEQ_DIR" ] || continue
    echo ">>> Processing sequence $SEQ_DIR"

    for TXT in "$SEQ_DIR"/*.txt; do
        [ -f "$TXT" ] || continue
        BASE_FILE="$(basename "$TXT")"
        echo "  -> Base file: $BASE_FILE"

        for M in "${MUTATIONS[@]}"; do
            echo "     -> Mutations=$M"
            MUT_DIR="${SEQ_DIR}/MUT_${M}"
            mkdir -p "$MUT_DIR"

            row="${BASE_FILE},$M"
            e_values=()
            v_values=()
            suff_values=()

            for ((exec_i=1; exec_i<=EXECUTIONS; exec_i++)); do
                EXEC_DIR="${MUT_DIR}/Exec_${exec_i}"
                mkdir -p "$EXEC_DIR"

                # Copy input and sources
                cp "$TXT" "$EXEC_DIR/"
                for f in "${SRC_FILES[@]}"; do
                    [ -f "${SCRIPT_DIR}/${f}" ] && cp "${SCRIPT_DIR}/${f}" "$EXEC_DIR/"
                done
                for h in "${SCRIPT_DIR}"/*.h; do
                    [ -f "$h" ] && cp "$h" "$EXEC_DIR/"
                done

                cd "$EXEC_DIR"

                # Compile tools
                [ -f "mutator.c" ] && gcc -fsanitize=address -g -Wall -Wextra -o mutator mutator.c
                [ -f "merge.c" ] && gcc -fsanitize=address -g -Wall -Wextra -o merger merge.c
                gcc -fsanitize=address -g -Wall -Wextra -o cdaig main.c cdaig.c hopcroft.c

                # ---- Run mutations ----
                TMP="$BASE_FILE.tmp"
                cp "$BASE_FILE" "$TMP"
                MERGE_OUTPUT="$BASE_FILE.merged"

                for ((i=1; i<ROWS; i++)); do
                    MUTATED="${BASE_FILE%.txt}_mutated_$i.txt"
                    ./mutator "$BASE_FILE" "$M" "$i" "$TSTV"
                    ./merger "$TMP" "$MUTATED" "$MERGE_OUTPUT"
                    cp "$MERGE_OUTPUT" "$TMP"
                done
                rm -f "$TMP"

                # ---- Run CDAIG ----
                output=$(./cdaig "$MERGE_OUTPUT" $ROWS $COLUMNS)
                ve_line=$(echo "$output" | grep -Eo "v=[0-9]+[[:space:]]+e=[0-9]+[[:space:]]+suff=[0-9]+")
                if [ -z "$ve_line" ]; then
                    v_val=0
                    e_val=0
                    suff_val=0
                else
                    v_val=$(echo "$ve_line" | grep -oE "v=[0-9]+" | cut -d= -f2)
                    e_val=$(echo "$ve_line" | grep -oE "e=[0-9]+" | cut -d= -f2)
                    suff_val=$(echo "$ve_line" | grep -oE "suff=[0-9]+" | cut -d= -f2)
                fi

                e_values+=("$e_val")
                v_values+=("$v_val")
                suff_values+=("$suff_val")
                row+=",$e_val,$v_val,$suff_val"

                # Generate PNG if DOT exists
                DOTFILE="$MERGE_OUTPUT.cdaig.dot"
                OUTPUT="$MERGE_OUTPUT.cdaig"
                [ -f "$DOTFILE" ] && dot -T"$FORMAT" "$DOTFILE" -o "$OUTPUT.$FORMAT"

                cd "$BASE_DIR"
            done

            # ---- Compute statistics including standard deviation ----
            N=${#e_values[@]}
            if [ "$N" -gt 0 ]; then
                # e statistics
                e_min=$(printf "%s\n" "${e_values[@]}" | sort -n | head -1)
                e_max=$(printf "%s\n" "${e_values[@]}" | sort -n | tail -1)
                e_sum=0; for val in "${e_values[@]}"; do e_sum=$((e_sum + val)); done
                e_avg=$(awk -v s="$e_sum" -v n="$N" 'BEGIN{ printf "%.2f", s/n }')
                e_std=$(awk -v vals="${e_values[*]}" -v mean="$e_avg" '
                    BEGIN{ split(vals,a); N=length(a); sumsq=0; for(i=1;i<=N;i++){sumsq+=(a[i]-mean)^2} if(N>1) printf "%.2f", sqrt(sumsq/N); else print "0.00" }')
                # v statistics
                v_min=$(printf "%s\n" "${v_values[@]}" | sort -n | head -1)
                v_max=$(printf "%s\n" "${v_values[@]}" | sort -n | tail -1)
                v_sum=0; for val in "${v_values[@]}"; do v_sum=$((v_sum + val)); done
                v_avg=$(awk -v s="$v_sum" -v n="$N" 'BEGIN{ printf "%.2f", s/n }')
                v_std=$(awk -v vals="${v_values[*]}" -v mean="$v_avg" '
                    BEGIN{ split(vals,a); N=length(a); sumsq=0; for(i=1;i<=N;i++){sumsq+=(a[i]-mean)^2} if(N>1) printf "%.2f", sqrt(sumsq/N); else print "0.00" }')
                # suff statistics
                suff_min=$(printf "%s\n" "${suff_values[@]}" | sort -n | head -1)
                suff_max=$(printf "%s\n" "${suff_values[@]}" | sort -n | tail -1)
                suff_sum=0; for val in "${suff_values[@]}"; do suff_sum=$((suff_sum + val)); done
                suff_avg=$(awk -v s="$suff_sum" -v n="$N" 'BEGIN{ printf "%.2f", s/n }')
                suff_std=$(awk -v vals="${suff_values[*]}" -v mean="$suff_avg" '
                    BEGIN{ split(vals,a); N=length(a); sumsq=0; for(i=1;i<=N;i++){sumsq+=(a[i]-mean)^2} if(N>1) printf "%.2f", sqrt(sumsq/N); else print "0.00" }')
            else
                e_min=0; e_max=0; e_avg="0.00"; e_std="0.00"
                v_min=0; v_max=0; v_avg="0.00"; v_std="0.00"
                suff_min=0; suff_max=0; suff_avg="0.00"; suff_std="0.00"
            fi

            # Append stats to row
            row+=",$e_min,$e_max,$e_avg,$e_std,$v_min,$v_max,$v_avg,$v_std,$suff_min,$suff_max,$suff_avg,$suff_std"

            # Write to CSV
            echo "$row" >> "$CSV_FILE"
        done
    done
done

shopt -u nullglob

echo "All experiments completed. Results stored in $BASE_DIR"
echo "Summary CSV: $CSV_FILE"

